...: dfoutput['Critical Value (%s)'%key] = value

    ...: print (dfoutput)

    ...:

    ...:

    ...: test_stationarity(ts)



Results of Dickey-Fuller Test:

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [12]: def test_stationarity(timeseries):

    ...: #Determing rolling statistics

    ...: rolmean = timeseries.rolling(window=12).mean()

    ...: rolstd = timeseries.rolling(window=12).std()

    ...: '''#Determing rolling statistics

    ...: rolmean = pd.rolling_mean(timeseries, window=12)

    ...: rolstd = pd.rolling_std(timeseries, window=12) '''

    ...: #Plot rolling statistics:

    ...: plt.plot(timeseries, color='blue',label='Original')

    ...: plt.plot(rolmean, color='red', label='Rolling Mean')

    ...: plt.plot(rolstd, color='black', label = 'Rolling Std')

    ...: plt.legend(loc='best')

    ...: plt.rcParams["figure.figsize"] = [20,20]

    ...: plt.title('Rolling Mean & Standard Deviation')

    ...: plt.show(block=False)

    ...: #Perform Dickey-Fuller test:

    ...: print ('Results of Dickey-Fuller Test:')

    ...: dftest = adfuller(timeseries, autolag='AIC')

    ...: dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])

    ...: for key,value in dftest[4].items():

    ...: dfoutput['Critical Value (%s)'%key] = value

    ...: print (dfoutput)

    ...:

    ...:

    ...: test_stationarity(ts)


Results of Dickey-Fuller Test:

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [13]: def test_stationarity(timeseries):

    ...: #Determing rolling statistics

    ...: rolmean = timeseries.rolling(window=12).mean()

    ...: rolstd = timeseries.rolling(window=12).std()

    ...: '''#Determing rolling statistics

    ...: rolmean = pd.rolling_mean(timeseries, window=12)

    ...: rolstd = pd.rolling_std(timeseries, window=12) '''

    ...: #Plot rolling statistics:

    ...: plt.plot(timeseries, color='blue',label='Original')

    ...: plt.plot(rolmean, color='red', label='Rolling Mean')

    ...: plt.plot(rolstd, color='black', label = 'Rolling Std')

    ...: plt.legend(loc='best')

    ...: plt.rcParams["figure.figsize"] = [30,30]

    ...: plt.title('Rolling Mean & Standard Deviation')

    ...: plt.show(block=False)

    ...: #Perform Dickey-Fuller test:

    ...: print ('Results of Dickey-Fuller Test:')

    ...: dftest = adfuller(timeseries, autolag='AIC')

    ...: dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])

    ...: for key,value in dftest[4].items():

    ...: dfoutput['Critical Value (%s)'%key] = value

    ...: print (dfoutput)

    ...:

    ...:

    ...: test_stationarity(ts)


Results of Dickey-Fuller Test:

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [14]: def test_stationarity(timeseries):

    ...: #Determing rolling statistics

    ...: rolmean = timeseries.rolling(window=12).mean()

    ...: rolstd = timeseries.rolling(window=12).std()

    ...: '''#Determing rolling statistics

    ...: rolmean = pd.rolling_mean(timeseries, window=12)

    ...: rolstd = pd.rolling_std(timeseries, window=12) '''

    ...: #Plot rolling statistics:

    ...: plt.plot(timeseries, color='blue',label='Original')

    ...: plt.plot(rolmean, color='red', label='Rolling Mean')

    ...: plt.plot(rolstd, color='black', label = 'Rolling Std')

    ...: plt.legend(loc='best')

    ...: plt.rcParams["figure.figsize"] = [50,80]

    ...: plt.title('Rolling Mean & Standard Deviation')

    ...: plt.show(block=False)

    ...: #Perform Dickey-Fuller test:

    ...: print ('Results of Dickey-Fuller Test:')

    ...: dftest = adfuller(timeseries, autolag='AIC')

    ...: dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])

    ...: for key,value in dftest[4].items():

    ...: dfoutput['Critical Value (%s)'%key] = value

    ...: print (dfoutput)

    ...:

    ...:

    ...: test_stationarity(ts)


Results of Dickey-Fuller Test:

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -4.456561

p-value 0.000235

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [15]: ts_log = np.log(ts)

    ...: plt.plot(ts_log)

Out[15]: [<matplotlib.lines.Line2D at 0x1f1c0c26828>]


In [16]: plt.rcParams["figure.figsize"] = [20,30]


In [17]: ts_log = np.log(ts)

    ...: plt.plot(ts_log)

Out[17]: [<matplotlib.lines.Line2D at 0x1f1c069a470>]


In [18]: plt.rcParams["figure.figsize"] = [20,10]


In [19]: ts_log = np.log(ts)

    ...: plt.plot(ts_log)

Out[19]: [<matplotlib.lines.Line2D at 0x1f1ba477860>]


In [20]: moving_avg = ts_log.rolling(window=12).mean()

    ...: plt.plot(ts_log)

    ...: plt.plot(moving_avg, color='red')

Out[20]: [<matplotlib.lines.Line2D at 0x1f1c0afacc0>]


In [21]: ts_log_moving_avg_diff = ts_log - moving_avg

    ...: ts_log_moving_avg_diff.dropna(inplace=True)

    ...: test_stationarity(ts_log_moving_avg_diff)


Results of Dickey-Fuller Test:

Test Statistic -24.228718

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18231.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -24.228718

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18231.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861699

dtype: float64

Test Statistic -24.228718

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18231.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861699

Critical Value (10%) -2.566854

dtype: float64


In [22]: expwighted_avg = ts_log.ewm( halflife=12).mean()

    ...: plt.plot(ts_log)

    ...: plt.plot(expwighted_avg, color='red')

Out[22]: [<matplotlib.lines.Line2D at 0x1f1bfd1a160>]


In [23]: plt.rcParams["figure.figsize"] = [20,10]


In [24]: expwighted_avg = ts_log.ewm( halflife=12).mean()

    ...: plt.plot(ts_log)

    ...: plt.plot(expwighted_avg, color='red')

Out[24]: [<matplotlib.lines.Line2D at 0x1f1c0961e80>]


In [25]: ts_log_diff = ts_log - ts_log.shift()

    ...: plt.plot(ts_log_diff)

Out[25]: [<matplotlib.lines.Line2D at 0x1f1c2b27710>]


In [26]: ts_log_ewma_diff = ts_log - expwighted_avg

    ...: test_stationarity(ts_log_ewma_diff)


Results of Dickey-Fuller Test:

Test Statistic -21.006745

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -21.006745

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -21.006745

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18242.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [27]: ts_log_diff.dropna(inplace=True)

    ...: test_stationarity(ts_log_diff)


Results of Dickey-Fuller Test:

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [28]: plt.rcParams["figure.figsize"] = [20,10]


In [29]: def test_stationarity(timeseries):

    ...: #Determing rolling statistics

    ...: rolmean = timeseries.rolling(window=12).mean()

    ...: rolstd = timeseries.rolling(window=12).std()

    ...: '''#Determing rolling statistics

    ...: rolmean = pd.rolling_mean(timeseries, window=12)

    ...: rolstd = pd.rolling_std(timeseries, window=12) '''

    ...: #Plot rolling statistics:

    ...: plt.plot(timeseries, color='blue',label='Original')

    ...: plt.plot(rolmean, color='red', label='Rolling Mean')

    ...: plt.plot(rolstd, color='black', label = 'Rolling Std')

    ...: plt.legend(loc='best')

    ...:

    ...: plt.title('Rolling Mean & Standard Deviation')

    ...: plt.show(block=False)

    ...: #Perform Dickey-Fuller test:

    ...: print ('Results of Dickey-Fuller Test:')

    ...: dftest = adfuller(timeseries, autolag='AIC')

    ...: dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])

    ...: for key,value in dftest[4].items():

    ...: dfoutput['Critical Value (%s)'%key] = value

    ...: print (dfoutput)


In [30]: ts_log_diff.dropna(inplace=True)

    ...: test_stationarity(ts_log_diff)


Results of Dickey-Fuller Test:

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

dtype: float64

Test Statistic -32.041628

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18241.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861698

Critical Value (10%) -2.566854

dtype: float64


In [31]: from statsmodels.tsa.seasonal import seasonal_decompose

    ...: decomposition = seasonal_decompose(ts_log)

    ...: trend = decomposition.trend

    ...: seasonal = decomposition.seasonal

    ...: residual = decomposition.resid


In [32]: plt.subplot(411)

    ...: plt.plot(ts_log, label='Original')

    ...: plt.legend(loc='best')

    ...: plt.subplot(412)

    ...: plt.plot(trend, label='Trend')

    ...: plt.legend(loc='best')

    ...: plt.subplot(413)

    ...: plt.plot(seasonal,label='Seasonality')

    ...: plt.legend(loc='best')

    ...: plt.subplot(414)

    ...: plt.plot(residual, label='Residuals')

    ...: plt.legend(loc='best')

    ...: plt.tight_layout()



In [33]: ts_log_decompose = residual

    ...: ts_log_decompose.dropna(inplace=True)

    ...: test_stationarity(ts_log_decompose)


Results of Dickey-Fuller Test:

Test Statistic -32.205683

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18218.000000

Critical Value (1%) -3.430709

dtype: float64

Test Statistic -32.205683

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18218.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861699

dtype: float64

Test Statistic -32.205683

p-value 0.000000

#Lags Used 45.000000

Number of Observations Used 18218.000000

Critical Value (1%) -3.430709

Critical Value (5%) -2.861699

Critical Value (10%) -2.566854

dtype: float64


In [34]: from statsmodels.tsa.stattools import acf, pacf

    ...: lag_acf = acf(ts_log_diff, nlags=20)

    ...: lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')

    ...: #Plot ACF:

    ...: plt.subplot(121)

    ...: plt.plot(lag_acf)

    ...: plt.axhline(y=0,linestyle='--',color='gray')

    ...: plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')

    ...: plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')

    ...: plt.title('Autocorrelation Function')

Out[34]: Text(0.5, 1.0, 'Autocorrelation Function')


In [35]: plt.subplot(122)

    ...: plt.plot(lag_pacf)

    ...: plt.axhline(y=0,linestyle='--',color='gray')

    ...: plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')

    ...: plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')

    ...: plt.title('Partial Autocorrelation Function')

    ...: plt.tight_layout()



In [36]: from statsmodels.tsa.arima_model import ARIMA

    ...:

    ...: #AR Model

    ...: model = ARIMA(ts_log, order=(1, 1, 0))

    ...: results_AR = model.fit(disp=-1)

    ...: plt.plot(ts_log_diff)

    ...: plt.plot(results_AR.fittedvalues, color='red')

    ...: plt.title('RSS: %.4f'% sum((results_AR.fittedvalues-ts_log_diff)**2))

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

C:\ProgramData\Anaconda3\lib\site-packages\scipy\signal\signaltools.py:1341: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.

out_full[ind] += zi

C:\ProgramData\Anaconda3\lib\site-packages\scipy\signal\signaltools.py:1344: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.

out = out_full[ind]

C:\ProgramData\Anaconda3\lib\site-packages\scipy\signal\signaltools.py:1350: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.

zf = out_full[ind]

Out[36]: Text(0.5, 1.0, 'RSS: 2718.0936')


In [37]: model = ARIMA(ts_log, order=(0, 1, 1))

    ...: results_MA = model.fit(disp=-1)

    ...: plt.plot(ts_log_diff)

    ...: plt.plot(results_MA.fittedvalues, color='red')

    ...: plt.title('RSS: %.4f'% sum((results_MA.fittedvalues-ts_log_diff)**2))

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

Out[37]: Text(0.5, 1.0, 'RSS: 2714.9878')


In [38]: model = ARIMA(ts_log, order=(1, 1, 1))

    ...: results_ARIMA = model.fit(disp=-1)

    ...: plt.plot(ts_log_diff)

    ...: plt.plot(results_ARIMA.fittedvalues, color='red')

    ...: plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff)**2))

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency H will be used.

% freq, ValueWarning)

Out[38]: Text(0.5, 1.0, 'RSS: 2640.9693')


In [39]: predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True)

    ...: predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()

    ...:

    ...: predictions_ARIMA_log = pd.Series(ts_log.ix[0], index=ts_log.index)

    ...: predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0)

    ...:

    ...: predictions_ARIMA = np.exp(predictions_ARIMA_log)

    ...: plt.plot(ts)

    ...: plt.plot(predictions_ARIMA)

    ...:

    ...: plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA-ts)**2)/len(ts)))

__main__:4: DeprecationWarning:

.ix is deprecated. Please use

.loc for label based indexing or

.iloc for positional indexing


See the documentation here:

http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated

Out[39]: Text(0.5, 1.0, 'RMSE: 5850131.2366')


In [40]: